This is a study of compartment-dependent heterogeneity in pancreatic cancer. In conclusion, we can reveal that intratumor heterogeneity in many cases is related to the invasion front properties; Tumor invading stroma or tumor invading lobes - the acinar invasion.
31 cases had 6 immunohistochemistry markers quantified in QuPath, 4 related to the basal like subtype (CK/KRT17, CK/KRT5, CA125 and HMGA2) and 2 related to the classical subtype (Muc5 and CDX2, which also relates to intestinal expression). Not all stains were available for all cases, which is stated in the supplementary material.
For each case, up to 9 regions of interest (ROIs) for “tumor in septa” (=stroma) and 9 ROIs representing “tumor in acinar (=lobular) invasion”. The ROIs are overlapping across the different stains. All quantifications extracted are only including tumor cells. Data cleanup include removal of ev. annotations in the images that helped correct ROI setting.
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
library(reactable)
library(ggthemes)
library(ggpubr)
## Loading required package: ggplot2
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
## The following object is masked from 'package:tidyr':
##
## smiths
library(corrplot)
## corrplot 0.95 loaded
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(viridis)
## Loading required package: viridisLite
library(RColorBrewer)
library(ggVennDiagram)
##
## Attaching package: 'ggVennDiagram'
## The following object is masked from 'package:tidyr':
##
## unite
library(dichromat)
library(survminer)
library(BioVenn)
library(Hmisc)
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(stringr)
library(forcats)
library(ggridges)
library(hrbrthemes)
reactable(stat.test.CK17_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.Muc5_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.CA125_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.CK5_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.HMGA2_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.CDX2_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(all_combined, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
Separate for each stain, faceted on each case.
Each dot = 1 ROI.
boxes_ck17 <- ggboxplot(all_CK17, x = "Tissue_compartment", y = "Tumor..Positive..",
color = "Tissue_compartment", palette = "Set2",
add = "jitter",
facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
labs(title = "Relative PDAC CK17+ cells in Lobule and Stroma", subtitle = "All cases") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12))+
xlab(NULL) + ylab("% CK17+ tumor cells") +
ylim(-1, 120) +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))
stat.test_CK17_1 <- stat.test_CK17 %>% add_xy_position(x = "Tissue_compartment")
Suppl_Fig_4b <- boxes_ck17 +
stat_pvalue_manual(label = "p.adj.signif",
stat.test_CK17_1, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_Fig_4b
boxes_ck5 <- ggboxplot(all_CK5, x = "Tissue_compartment", y = "Tumor..Positive..",
color = "Tissue_compartment", palette = "Set2",
add = "jitter",
facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
labs(title = "Relative PDAC CK5+ cells in Lobule and Stroma", subtitle = "All cases") +
xlab(NULL) + ylab("% CK5+ tumor cells") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
theme(axis.text=element_text(size=12),
axis.title=element_text(size=12)) +
ylim(-1, 120) +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))
stat.test_CK5_1 <- stat.test_CK5 %>% add_xy_position(x = "Tissue_compartment")
Suppl_5b <- boxes_ck5 +
stat_pvalue_manual(label = "p.adj.signif",
stat.test_CK5_1, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_5b
boxes_ca125 <- ggboxplot(all_CA125, x = "Tissue_compartment", y = "Tumor..Positive..",
color = "Tissue_compartment",
add = "jitter",
facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
labs(title = "Relative PDAC CA125+ cells in Lobule and Stroma", subtitle = "All cases") +
xlab(NULL) + ylab("% CA125+ tumor cells") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
theme(axis.text=element_text(size=12),
axis.title=element_text(size=12)) +
ylim(-1, 120) +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm")) +
scale_color_manual(values = c("#FC8D62", "#66C2A5"))
stat.test_CA125_1 <- stat.test_CA125 %>% add_xy_position(x = "Tissue_compartment")
Suppl_Fig_7b <- boxes_ca125 + stat_pvalue_manual(label = "p.adj.signif",
stat.test_CA125_1, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_Fig_7b
boxes_HMGA2 <- ggboxplot(all_HMGA2, x = "Tissue_compartment", y = "Tumor..Positive..",
color = "Tissue_compartment",
add = "jitter",
facet.by = "PKR", ncol = 3, short.panel.labs = TRUE) +
labs(title = "Relative PDAC HMGA2+ cells in Lobule and Stroma", subtitle = "All cases") +
xlab(NULL) + ylab("% HMGA2+ tumor cells") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) +
ylim(-1, 120) +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))+
scale_color_manual(values = c("#FC8D62", "#66C2A5"))
stat.test_HMGA2_1 <- stat.test_HMGA2 %>% add_xy_position(x = "Tissue_compartment")
Suppl_Fig_6b <- boxes_HMGA2 + stat_pvalue_manual(label = "p.adj.signif",
stat.test_HMGA2_1, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_Fig_6b
boxes_Muc5 <- ggboxplot(all_Muc5, x = "Tissue_compartment", y = "Tumor..Positive..",
color = "Tissue_compartment",
add = "jitter",
facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
labs(title = "Relative PDAC Muc5+ cells in Lobule and Stroma", subtitle = "All cases") +
xlab(NULL) + ylab("% Muc5+ tumor cells") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
theme(axis.text=element_text(size=12),
axis.title=element_text(size=12)) +
ylim(-1, 120) +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))+
scale_color_manual(values = c("#FC8D62", "#66C2A5"))
stat.test_Muc5_1 <- stat.test_Muc5 %>% add_xy_position(x = "Tissue_compartment")
Suppl_Fig_8b <- boxes_Muc5 + stat_pvalue_manual(label = "p.adj.signif",
stat.test_Muc5_1, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_Fig_8b
boxes_CDX2 <- ggboxplot(all_CDX2, x = "Tissue_compartment", y = "Tumor..Positive..",
color = "Tissue_compartment", palette = "Set2",
add = "jitter",
facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
labs(title = "Relative PDAC CDX2+ cells in Lobule and Stroma", subtitle = "All cases") +
xlab(NULL) + ylab("% CDX2+ tumor cells") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
theme(axis.text=element_text(size=12),
axis.title=element_text(size=12)) +
ylim(-1, 120) +
theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))
stat.test_CDX2_1 <- stat.test_CDX2 %>% add_xy_position(x = "Tissue_compartment")
Suppl_Fig_9b <- boxes_CDX2 + stat_pvalue_manual(label = "p.adj.signif",
stat.test_CDX2_1, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_Fig_9b
Only cases significant difference between lobular - and stromal
invasion.
Each dot = 1 ROI.
Correlation plot of the markers in IHC cohort, No imputation, enabled by using the data matrix all_combined_exl, and only using complete obervations.
head(cor_A)
## CA125 CDX2 CK17 CK5 HMGA2 Muc5
## CA125 1.0000000 -0.2810753 0.8707811 0.4855878 0.8797215 -0.5688701
## CDX2 -0.2810753 1.0000000 -0.4758926 -0.2144643 -0.2169090 0.4749056
## CK17 0.8707811 -0.4758926 1.0000000 0.5885561 0.7987999 -0.5422339
## CK5 0.4855878 -0.2144643 0.5885561 1.0000000 0.2762256 -0.4209848
## HMGA2 0.8797215 -0.2169090 0.7987999 0.2762256 1.0000000 -0.5028824
## Muc5 -0.5688701 0.4749056 -0.5422339 -0.4209848 -0.5028824 1.0000000
head(testRes)
## $p
## CA125 CDX2 CK17 CK5 HMGA2
## CA125 0.000000e+00 2.888282e-03 3.153729e-11 1.384304e-05 1.048558e-15
## CDX2 2.888282e-03 0.000000e+00 4.599406e-05 4.587324e-03 3.733383e-06
## CK17 3.153729e-11 4.599406e-05 0.000000e+00 1.907032e-09 2.601302e-09
## CK5 1.384304e-05 4.587324e-03 1.907032e-09 0.000000e+00 1.052497e-13
## HMGA2 1.048558e-15 3.733383e-06 2.601302e-09 1.052497e-13 0.000000e+00
## Muc5 1.673417e-01 1.340409e-03 3.937881e-16 4.357883e-04 3.447229e-09
## Muc5
## CA125 1.673417e-01
## CDX2 1.340409e-03
## CK17 3.937881e-16
## CK5 4.357883e-04
## HMGA2 3.447229e-09
## Muc5 0.000000e+00
##
## $lowCI
## CA125 CDX2 CK17 CK5 HMGA2 Muc5
## CA125 1.0000000 -0.33160594 0.2518562 0.1916069 0.5673797 -0.17920192
## CDX2 -0.3316059 1.00000000 -0.3120629 -0.3583226 -0.4887262 0.06676752
## CK17 0.2518562 -0.31206294 1.0000000 0.2600504 0.2898819 -0.43224971
## CK5 0.1916069 -0.35832262 0.2600504 1.0000000 0.4311637 -0.34083972
## HMGA2 0.5673797 -0.48872617 0.2898819 0.4311637 1.0000000 -0.52379566
## Muc5 -0.1792019 0.06676752 -0.4322497 -0.3408397 -0.5237957 1.00000000
##
## $uppCI
## CA125 CDX2 CK17 CK5 HMGA2 Muc5
## CA125 1.00000000 -0.07139553 0.4381845 0.46826847 0.7753870 0.03141959
## CDX2 -0.07139553 1.00000000 -0.1131129 -0.06875649 -0.2150961 0.26928919
## CK17 0.43818452 -0.11311287 1.0000000 0.47748503 0.5263557 -0.27721735
## CK5 0.46826847 -0.06875649 0.4774850 1.00000000 0.6522895 -0.10113630
## HMGA2 0.77538701 -0.21509613 0.5263557 0.65228948 1.0000000 -0.28664106
## Muc5 0.03141959 0.26928919 -0.2772173 -0.10113630 -0.2866411 1.00000000
Suppl_Fig_3b <- draw(hp, annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )
Suppl_Fig_3b
#Sanity-check:
dim(matrix_A) # 488*6 = 2928
## [1] 488 6
#get nr of NA:
sum(is.na(matrix_A)) #823
## [1] 823
2928-823
## [1] 2105
nrow(all_combined_exl)
## [1] 2105
#(dim counts) - NA = the quantified ROIs, as in the rownumbers for all_combined.
i_dim <- 2928-823
i_rows <- as.numeric(nrow(all_combined_exl))
identical(i_dim, i_rows) #2105
## [1] TRUE
pheatmap(t_m_matrix_A, fontsize_col = 6, annotation_col = m_A, annotation_colors = annot_colors_A, show_colnames = F, labels_col = "Case ID and Stain", main = "Heatmap A: Case ID and ROIs over Stains. % Positive tumor cells", cluster_cols = T, cluster_rows = T)
Fig_2e <- pheatmap(t_m_matrix_B, cluster_rows = T, cluster_cols = T, annotation_row = m_B, color=col.pal2, annotation_colors = annot_colors_B, show_rownames = F, cutree_cols = 2, main = "Heatmap B: Case ID and stains over ROI:s. Values: % Positive tumor cells")
Fig_2e
#Sanity-check:
dim(t_m_matrix_B) # 159*18
## [1] 159 18
#get nr of NA:
sum(is.na(t_m_matrix_B)) #354
## [1] 354
#(dim counts) - NA = the quantified ROIs, as in the rownumbers for all_combined.
i_dim <- (159*18)-354
i_rows <- as.numeric(nrow(all_combined))
identical(i_dim, i_rows)
## [1] TRUE
pheatmap(m_matrix_C, annotation_colors = annot_colors_Call, show_colnames = F, color=col.pal2, annotation_col = m_CC, annotation_row = m_Crows, main = "Heatmap C: Case ID over ROI:s and stains. Values: % Positive tumor cells", cluster_rows = T, cluster_cols = T)
pheatmap(C_AVG_2, cutree_rows = 2, annotation_colors = annot_colors_Call, color=col.pal2, annotation_row = m_Crows, cutree_cols = 2, main = "Heatmap C: Case ID over ROI:s and stains. Values: Average % Positive tumor cells", cluster_rows = T, cluster_cols = T)
## Warning: The input is a data frame, convert it to the matrix.
pheatmap(tmB_CK17, cutree_cols = 2, color=col.pal2, main = "Heatmap B for CK17: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.
pheatmap(tmB_Muc5, cutree_cols = 2,color=col.pal2, main = "Heatmap B for Muc5: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.
pheatmap(tmB_CK5, cutree_cols = 2, color=col.pal2,main = "Heatmap B for CK5: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.
pheatmap(tmB_HMGA2, cutree_cols = 2, color=col.pal2,main = "Heatmap B for HMGA2: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.
pheatmap(tmB_CA125, cutree_cols = 2, color=col.pal2,main = "Heatmap B for CA125: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.
pheatmap(tmB_CDX2, cutree_cols = 2,color=col.pal2, main = "Heatmap B for CDX2: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.
# -> only Muc5 have a complete separation
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).
boxes_overall + stat_pvalue_manual(label = "p.adj.signif",
stat.test_overallavg, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # this is alright but does not show the paired (cases)
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).
nlevels(a$PKR) #31, number of unique cases
## [1] 31
a_return_perstain <- a %>%
group_by(Average_Stain) %>%
summarise(N.instances = n())
a_return_perstain #62, the double of 31. one average per case and compartment
## # A tibble: 6 × 2
## Average_Stain N.instances
## <fct> <int>
## 1 CA125 62
## 2 CDX2 62
## 3 CK17 62
## 4 CK5 62
## 5 HMGA2 62
## 6 Muc5 62
a_return_perstainandcase <- a %>%
group_by(Average_Stain, PKR) %>%
summarise(N.instances = n())
## `summarise()` has grouped output by 'Average_Stain'. You can override using the
## `.groups` argument.
a_return_perstainandcase
## # A tibble: 186 × 3
## # Groups: Average_Stain [6]
## Average_Stain PKR N.instances
## <fct> <fct> <int>
## 1 CA125 1 2
## 2 CA125 11 2
## 3 CA125 12 2
## 4 CA125 13 2
## 5 CA125 14 2
## 6 CA125 15 2
## 7 CA125 16 2
## 8 CA125 17 2
## 9 CA125 18 2
## 10 CA125 2 2
## # ℹ 176 more rows
## Warning: Removed 42 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 42 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
#no plasticity, visualising paired:
boxes_overall_paired_noplast <- ggplot(c_noplast, aes(x = Tissue_compartment, y = Avg_value)) +
geom_boxplot(aes(fill = Tissue_compartment, color = Tissue_compartment), alpha = 0.2) +
geom_line(aes(group = PKR), colour = "gray") +
geom_point(size = 1.5, aes(fill = Tissue_compartment, color = Tissue_compartment)) +
facet_wrap(~ Average_Stain) +
stat_pvalue_manual(label = "p.adj.signif",
stat.test_overallavg_noplast, tip.length = 0) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme_clean() +
scale_color_brewer(palette= "Accent") +
scale_fill_brewer(palette = "Accent") +
labs(title = "Average expression of PDAC cells in Lobule and Stroma", subtitle = "Subset: Cases with no plasticity") +
xlab(NULL) + ylab("Average + tumor cells") +
theme(plot.margin = margin(1.1, 1.1, 1.1, 1.1, "cm")) +
ylim(-1, 120)
boxes_overall_paired_noplast
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
The index ranges between 0-1, where 1 = high diversity and 0 = no diversity
Matrix with the distribution scores: Distribution levels; below called “Intensity scores. This category represents the fraction of positive cells in each ROI:
sim_all_c6$`2` #Example of how the data matrix looks like, for just one case, PKR-2
## CA125 CDX2 CK17 CK5 HMGA2 Muc5
## 0.8104575 0.5032680 0.8104575 0.6601307 0.7124183 0.8366013
ggplot(simpsons_2, aes(x=PKR, y=Simpsons,
color = Stain, fill = Stain,
position = "dodge")) +
geom_bar(stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
scale_fill_brewer(palette = "PuOr") +
scale_color_brewer(palette = "PuOr") +
geom_hline(yintercept = simpline,linetype = "longdash") +
geom_text(aes(29.5, simpline, label = "3rd quantile", vjust = -1), col = "brown") +
labs(title = "Simpson index of all cases and stains") +
theme_clean()
ggplot(simpsons_lgpval_2, aes(x=PKR,
color = Stain, fill = Stain,
position = "dodge")) +
geom_bar(aes(y = p_val), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
geom_bar(aes(y = Simpsons), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
scale_fill_brewer(palette= "BrBG", type = "div") +
scale_color_brewer(palette= "BrBG", type = "div") +
geom_hline(yintercept = log10(pline), linetype = "dashed", color = "black") +
geom_text(aes(29.5, simpline, label = "3rd quantile", vjust = -1), size = 3, col = "black") +
geom_text(aes(28.8, log10(pline), label = "p = 0.05", vjust = -1), size = 3, col = "black") +
geom_hline(yintercept = simpline,linetype = "dashed", col = "black") +
geom_hline(yintercept = 0,linetype = "solid", color = "black")+
labs(title = "p-values of wilcoxon test and Simpsons for all cases and stains") +
ylab(c("lg(adjusted p-values from wilxocon testing) Simpson index of diversity")) +
theme_clean()
# Scatterplot
x_line <- simpsons_pval_2$p_val
y_line <- simpsons_pval_2$Simpsons
scatter_simppval <- ggplot(simpsons_pval_2, aes(p_val, Simpsons, color = Stain), size = 4, alpha = 0.8)
Suppl_Fig_10a <- scatter_simppval + geom_point(aes(alpha = 0.7, size = 0.5)) +
ggtitle("scatterplot of p-values and simpsons index: Suppl fig 10a") +
geom_hline(yintercept = simpline,linetype = "dashed", col = "black") +
geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), size = 4, col = "black") +
geom_vline(xintercept = pline, linetype = "dashed", color = "black") +
geom_text(aes(0.15, pline, label = "p = 0.05 ", vjust = -1), size = 4, col = "black") +
scale_color_manual(values =
c("CK17"="#e41a1c", "CK5"="#984ea3", "HMGA2" = "#ff7f00", "CA125" = "#ffff33", "CDX2" = "#4daf4a", "Muc5" = "#377eb8"))+
theme_clean()
Suppl_Fig_10a
## Warning in geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), : All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_text(aes(0.15, pline, label = "p = 0.05 ", vjust = -1), : All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
scatter_simppval + geom_point(aes(alpha = 0.7, size = 3)) +
ggtitle("scatterplot of p-values and simpsons index") +
geom_hline(yintercept = simpline,linetype = "dashed", col = "black") +
geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), size = 4, col = "black") +
geom_vline(xintercept = pline, linetype = "dashed", color = "black") +
geom_text(aes(0.15, pline, label = "p = 0.05 ", vjust = -1), size = 4, col = "black") +
scale_color_manual(values =
c("CK17"="#e41a1c", "CK5"="#984ea3", "HMGA2" = "#ff7f00", "CA125" = "#ffff33", "CDX2" = "#4daf4a", "Muc5" = "#377eb8"))+
geom_smooth(method = "lm", se = FALSE) +
theme_clean()
## Warning in geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), : All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## `geom_smooth()` using formula = 'y ~ x'
simpsons_pval_3 <- simpsons_pval_2
simpsons_pval_3$Event_simp_above <- factor(186)
simpsons_pval_3$Event_pval_below <- factor(186)
simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_simp_above = ifelse(Simpsons >= simpline,
'true', .$Event_simp_above))
simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_pval_below = ifelse(p_val <= pline,
'true', .$Event_pval_below))
simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_simp_above = ifelse(Event_simp_above != "true",
'false', .$Event_simp_above))
simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_pval_below = ifelse(Event_pval_below != "true",
'false', .$Event_pval_below))
simpsons_pval_4 <- simpsons_pval_3[, c(1, 3, 5:7)]
simpsons_pval_4[is.na(simpsons_pval_4)] <- 'false'
simpsons_venn <- simpsons_pval_4[, c(3, 4, 5)]
simpsons_venn <- simpsons_venn %>% mutate(across(starts_with("Event"), as.logical))
Vennfunc <- lapply(simpsons_venn[, c(2, 3)], function (x)
which(x == 1))
Venn_simp_pval <- ggVennDiagram(Vennfunc)
Suppl_Fig_10b_old <- Venn_simp_pval +
scale_fill_gradient(low = "#0072B2", high = "#CC79A7")
scale_colour_gradient(low = "#0072B2", high = "#CC79A7")
## <ScaleContinuous>
## Range:
## Limits: 0 -- 1
theme_cleantable()
## List of 7
## $ axis.title.x: list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.title.y: list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.text.x : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.ticks.x: list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.ticks.y: list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.line.x : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.line.y : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
Suppl_Fig_10b_old #color changed
# Venn, version where the size of Venn circles correspond to the numbers of cells in the respective group
# colors now correspond to the group identity instead of nr of ROIs in each group.
listed_z <- NULL
propVenn <- draw.venn(list_x = Vennfunc$Event_simp_above, list_y = Vennfunc$Event_pval_below, list_z = listed_z, title = "Venn diagram of overlapping measures of heterogeneity", xtitle = "Simpsons above threshold", ytitle = "p−value sign", x_c = "#F1BB7B", y_c = "#FD6467", nrtype = "pct")
## [1] "x total: 40"
## [1] "y total: 48"
## [1] "z total: 0"
## [1] "x only: 21"
## [1] "y only: 29"
## [1] "z only: 0"
## [1] "x-y total overlap: 19"
## [1] "x-z total overlap: 0"
## [1] "y-z total overlap: 0"
## [1] "x-y only overlap: 19"
## [1] "x-z only overlap: 0"
## [1] "y-z only overlap: 0"
## [1] "x-y-z overlap: 0"
Suppl_Fig_10b_new <- propVenn
Suppl_Fig_10b_new
## $x
## [1] 10 11 19 20 23 25 32 44 58 62 65 68 71 72 75 76 77 81 86
## [20] 92 108 112 113 114 116 117 120 122 125 126 127 128 130 132 133 134 136 138
## [39] 151 157
##
## $y
## [1] 1 5 7 10 11 30 36 39 42 48 55 60 62 63 65 67 68 70 71
## [20] 73 74 76 77 78 81 83 86 88 92 93 98 100 114 117 120 121 122 123
## [39] 126 128 132 135 136 137 140 147 152 154
##
## $z
## NULL
##
## $x_only
## [1] 19 20 23 25 32 44 58 72 75 108 112 113 116 125 127 130 133 134 138
## [20] 151 157
##
## $y_only
## [1] 1 5 7 30 36 39 42 48 55 60 63 67 70 73 74 78 83 88 93
## [20] 98 100 121 123 135 137 140 147 152 154
##
## $z_only
## NULL
##
## $xy
## [1] 10 11 62 65 68 71 76 77 81 86 92 114 117 120 122 126 128 132 136
##
## $xz
## NULL
##
## $yz
## NULL
##
## $xy_only
## [1] 10 11 62 65 68 71 76 77 81 86 92 114 117 120 122 126 128 132 136
##
## $xz_only
## NULL
##
## $yz_only
## NULL
##
## $xyz
## NULL
simppval_return <- simpsons_venn %>%
group_by(Event_pval_below, Event_simp_above) %>%
summarise(N.events = n())
## `summarise()` has grouped output by 'Event_pval_below'. You can override using
## the `.groups` argument.
simppval_return
## # A tibble: 4 × 3
## # Groups: Event_pval_below [2]
## Event_pval_below Event_simp_above N.events
## <lgl> <lgl> <int>
## 1 FALSE FALSE 90
## 2 FALSE TRUE 21
## 3 TRUE FALSE 29
## 4 TRUE TRUE 19
simpson_return_perstain <- simpsons_pval_2 %>%
group_by(Stain) %>%
summarise(N.instances = n())
simpson_return_perstain
## # A tibble: 6 × 2
## Stain N.instances
## <fct> <int>
## 1 CA125 30
## 2 CDX2 28
## 3 CK17 31
## 4 HMGA2 21
## 5 Muc5 31
## 6 CK5 18
sum(simpson_return_perstain$N.instances)
## [1] 159
ggplot(exlg_simpson_pval_2, aes(x=PKR,
color = Stain, fill = Stain,
position = "dodge")) +
geom_bar(aes(y = p_val), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
geom_bar(aes(y = Simpsons), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
scale_fill_brewer(palette = "PuOr") +
scale_color_brewer(palette = "PuOr") +
geom_hline(yintercept = log10(pline), linetype = "dashed", color = "brown") +
geom_hline(yintercept = simpline,linetype = "dashed", color = "brown") +
geom_hline(yintercept = 0,linetype = "solid", color = "black")+
geom_text(aes(4.4, log10(pline), label = "p < 0.05", vjust = 2), col = "brown") +
geom_text(aes(4.4, simpline, label = "3rd quantile", vjust = -1), col = "brown") +
labs(title = "p-values of wilcoxon test and Simpsons") +
ylab(c("log(adjusted p-values from wilxocon testing) Simpson index of diversity")) +
ylim(-3.8, 1.5) +
theme_clean()
ggplot(het, aes(x=PKR, position = "dodge", color = Group, fill = Group)) +
geom_bar(aes(y = Heterogeneity_Cat), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
geom_bar(aes(y = -Plasticity_Cat), stat = "identity", width = 0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
scale_fill_brewer(palette = "Accent") +
scale_color_brewer(palette = "Accent") +
geom_hline(yintercept = 0,linetype = "solid", color = "black")+
labs(title = "Comparing heterogeneity measures") +
ylab(c("Plasticity by Wilcox Heterogeneity by Simpsons")) +
scale_y_continuous(breaks=c(-3, -2, -1, 1, 2, 3), label= c("Extensive", "Moderate", "Slight", "Slight", "Moderate", "Extensive")) +
theme_clean()
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
sp +
facet_grid(ROI ~ .)+ #sort on (basal like) expression in stroma.
labs(title = "Ratio of classical and basal-like expressing cells in stromal and acinar ROIs", subtitle = "All cases")
Ratio_joined +
facet_grid(ROI ~ .) +
geom_point(aes(color=Plasticity, alpha = Group)) +
scale_colour_manual(values = c(Extensive = "#4D004B", Moderate = "#8C6BB1", Slight = "#9EBCDA", None = "#808080", Classical = "#97C8FF", Basal = "#FF6242")) +
scale_alpha_discrete(range=c(0, 1))+
labs(title = "Ratio of all positive cells separated on compartment", subtitle = "All cases")
## Warning: Using alpha for a discrete variable is not advised.
sp_noplast +
facet_grid(ROI ~ .) #sort on (basal like) expression in stroma.
distrib_stroma_basal
distrib_stroma_CK17
distrib_stroma_CK5
distrib_stroma_CA125
distrib_stroma_HMGA2
distrib_stroma_classical
distrib_stroma_Muc5
distrib_stroma_CDX2
distrib_lobule_basal
distrib_lobule_CK17
distrib_lobule_CK5
distrib_lobule_HMGA2
distrib_lobule_CA125
distrib_lobule_classical
distrib_lobule_Muc5
distrib_lobule_CDX2
Density plots, separated on compartment using the datasets distrib_s and distrib_a
density_stroma <- distrib_s %>%
ggplot(aes(x = Tumor..Positive.., fill = Stain)) +
geom_density(color="#e9ecef", alpha=0.8) +
facet_grid(~Stain)
density_stroma
#As ridgeline
ridges_stroma <- distrib_s %>%
ggplot(aes(x = Tumor..Positive.., y = Stain, fill = Stain)) +
geom_density_ridges(color="#e9ecef", alpha=0.8)
ridges_stroma
## Picking joint bandwidth of 5.49
#As ridgeline with color gradient
ridges_gradient_stroma <- ggplot(distrib_s, aes(x = `Tumor..Positive..`, y = `Stain`, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "Temp. [F]", option = "E") +
labs(title = 'Density of stains in stroma') +
theme_clean() +
theme(
legend.position="none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
)+
xlab("% Positive cells") + ylab(NULL)
ridges_gradient_stroma
## Warning: The dot-dot notation (`..x..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(x)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Picking joint bandwidth of 5.49
# Violin plots
violin_stroma_trimmed <- distrib_s %>%
ggplot(aes(x = Stain, y = Tumor..Positive.., fill = Stain)) +
geom_violin(trim = TRUE) +
theme_clean()
violin_stroma_trimmed
density_lobule <- distrib_a %>%
ggplot(aes(x = Tumor..Positive.., fill = Stain)) +
geom_density(color="#e9ecef", alpha=0.8) +
facet_grid(~Stain)
density_lobule
#As ridgeline
ridges_lobule <- distrib_a %>%
ggplot(aes(x = Tumor..Positive.., y = Stain, fill = Stain)) +
geom_density_ridges(color="#e9ecef", alpha=0.8)
ridges_lobule
## Picking joint bandwidth of 4.77
#As ridgeline with color gradient
ridges_gradient_lobule <- ggplot(distrib_a, aes(x = `Tumor..Positive..`, y = `Stain`, fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis(name = "Temp. [F]", option = "E") +
labs(title = 'Density of stains in Lobule') +
theme_clean() +
theme(
legend.position="none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
) +
xlab("% Positive cells") + ylab(NULL)
ridges_gradient_lobule
## Picking joint bandwidth of 4.77
# Violin plots
violin_acinar_trimmed <- distrib_a %>%
ggplot(aes(x = Stain, y = Tumor..Positive.., fill = Stain)) +
geom_violin(trim = TRUE) +
theme_clean()
violin_acinar_trimmed
#Violin plots for both compartments
distrib_all <- all_combined[, c(1, 2, 3, 5, 11, 12)]
violin_both_untrimmed <- distrib_all %>%
ggplot(aes(x = Stain, y = Tumor..Positive.., color = Tissue_compartment)) +
geom_violin(trim = FALSE, adjust = 2) +
theme_calc()
violin_both_untrimmed
Plasticity_ID <- m_Crows
Plasticity_ID$PKR <- rownames(Plasticity_ID)
# 'Basal' and 'Classical' both go to the moderate category
Plasticity_ID %>% count(Plasticity)
## Plasticity n
## 1 Basal 2
## 2 Classical 1
## 3 Extensive 4
## 4 Moderate 7
## 5 None 7
## 6 Slight 10
Plasticity_ID <- Plasticity_ID %>% mutate(Plasticity = ifelse(grepl('Basal', .$Plasticity), 'Moderate', .$Plasticity))
Plasticity_ID <- Plasticity_ID %>% mutate(Plasticity = ifelse(grepl('Classical', .$Plasticity), 'Moderate', .$Plasticity))
Plasticity_ID %>% count(Plasticity)
## Plasticity n
## 1 Extensive 4
## 2 Moderate 10
## 3 None 7
## 4 Slight 10
# Fraction of any type of switching; 24/31 ≈ 77,4%
range(all_combined_exl$Num.Detections)
## [1] 299 864
# between 299-864 cells in ROIs.
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS 15.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Stockholm
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] hrbrthemes_0.8.7 ggridges_0.5.6 forcats_1.0.0
## [4] stringr_1.5.1 Hmisc_5.2-3 BioVenn_1.1.3
## [7] survminer_0.5.0 dichromat_2.0-0.1 ggVennDiagram_1.5.2
## [10] RColorBrewer_1.1-3 viridis_0.6.5 viridisLite_0.4.2
## [13] ComplexHeatmap_2.22.0 patchwork_1.3.0 corrplot_0.95
## [16] reshape2_1.4.4 data.table_1.17.0 ggpubr_0.6.0
## [19] ggplot2_3.5.2 ggthemes_5.1.0 reactable_0.4.4
## [22] rstatix_0.7.2 dplyr_1.1.4 tidyr_1.3.1
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.17.1 jsonlite_2.0.0 shape_1.4.6.1
## [4] magrittr_2.0.3 farver_2.1.2 rmarkdown_2.29
## [7] GlobalOptions_0.1.2 zlibbioc_1.52.0 vctrs_0.6.5
## [10] Cairo_1.6-2 memoise_2.0.1 base64enc_0.1-3
## [13] htmltools_0.5.8.1 progress_1.2.3 plotrix_3.8-4
## [16] curl_6.2.2 broom_1.0.8 Formula_1.2-5
## [19] sass_0.4.10 bslib_0.9.0 htmlwidgets_1.6.4
## [22] plyr_1.8.9 httr2_1.1.2 zoo_1.8-14
## [25] cachem_1.1.0 lifecycle_1.0.4 iterators_1.0.14
## [28] pkgconfig_2.0.3 Matrix_1.7-3 R6_2.6.1
## [31] fastmap_1.2.0 GenomeInfoDbData_1.2.13 clue_0.3-66
## [34] digest_0.6.37 colorspace_2.1-1 AnnotationDbi_1.68.0
## [37] S4Vectors_0.44.0 crosstalk_1.2.1 textshaping_1.0.1
## [40] RSQLite_2.3.11 reactR_0.6.1 labeling_0.4.3
## [43] filelock_1.0.3 km.ci_0.5-6 mgcv_1.9-3
## [46] httr_1.4.7 abind_1.4-8 compiler_4.4.1
## [49] fontquiver_0.2.1 bit64_4.6.0-1 withr_3.0.2
## [52] doParallel_1.0.17 htmlTable_2.4.3 backports_1.5.0
## [55] carData_3.0-5 DBI_1.2.3 Rttf2pt1_1.3.12
## [58] ggsignif_0.6.4 biomaRt_2.62.1 rappdirs_0.3.3
## [61] rjson_0.2.23 ggsci_3.2.0 tools_4.4.1
## [64] foreign_0.8-90 extrafontdb_1.0 nnet_7.3-20
## [67] glue_1.8.0 nlme_3.1-168 checkmate_2.3.2
## [70] cluster_2.1.8.1 generics_0.1.4 gtable_0.3.6
## [73] KMsurv_0.1-5 hms_1.1.3 utf8_1.2.5
## [76] xml2_1.3.8 car_3.1-3 XVector_0.46.0
## [79] BiocGenerics_0.52.0 foreach_1.5.2 pillar_1.10.2
## [82] circlize_0.4.16 splines_4.4.1 BiocFileCache_2.14.0
## [85] lattice_0.22-7 survival_3.8-3 bit_4.6.0
## [88] tidyselect_1.2.1 fontLiberation_0.1.0 Biostrings_2.74.1
## [91] knitr_1.50 fontBitstreamVera_0.1.1 gridExtra_2.3
## [94] IRanges_2.40.1 svglite_2.2.0 stats4_4.4.1
## [97] xfun_0.52 Biobase_2.66.0 matrixStats_1.5.0
## [100] stringi_1.8.7 UCSC.utils_1.2.0 yaml_2.3.10
## [103] evaluate_1.0.3 codetools_0.2-20 extrafont_0.19
## [106] gdtools_0.4.2 tibble_3.2.1 cli_3.6.5
## [109] rpart_4.1.24 xtable_1.8-4 systemfonts_1.2.3
## [112] jquerylib_0.1.4 survMisc_0.5.6 Rcpp_1.0.14
## [115] GenomeInfoDb_1.42.3 dbplyr_2.5.0 png_0.1-8
## [118] parallel_4.4.1 blob_1.2.4 prettyunits_1.2.0
## [121] scales_1.4.0 purrr_1.0.4 crayon_1.5.3
## [124] GetoptLong_1.0.5 rlang_1.1.6 KEGGREST_1.46.0